
.SILENT:

include ../config.mk

CXX            ?= @CXX@
NVCC           ?= @NVCC@
LINKER         ?= @CXX@
CPPFLAGS       ?= @CPPFLAGS@
CXXFLAGS       ?= @CXXFLAGS@
NVCCFLAGS      ?= @NVCCFLAGS@
LDFLAGS        ?= @LDFLAGS@ @LIBS@
DOXYGEN        ?= @DX_DOXYGEN@
PYBUILDFLAGS   ?= @PYBUILDFLAGS@
PYINSTALLFLAGS ?= @PYINSTALLFLAGS@

HAVE_RECVMSG  ?= @HAVE_RECVMSG@
HAVE_RDMA     ?= @HAVE_RDMA@

HAVE_CUDA     ?= @HAVE_CUDA@

GPU_ARCHS     ?= @GPU_ARCHS@

CUDA_HOME     ?= @CUDA_HOME@
CUDA_LIBDIR   ?= $(CUDA_HOME)/lib
CUDA_LIBDIR64 ?= $(CUDA_HOME)/lib64
CUDA_INCDIR   ?= $(CUDA_HOME)/include

NVCC_GENCODE ?= @NVCC_GENCODE@

LIBBIFROST_OBJS = \
  common.o \
  memory.o \
  affinity.o \
  hw_locality.o \
  cuda.o \
  fileutils.o \
  testsuite.o \
  ring.o \
  ring_impl.o \
  Socket.o \
  array.o \
  unpack.o \
  quantize.o \
  proclog.o
ifeq ($(HAVE_RECVMSG),1)
  # These files require recvmsg to compile
  LIBBIFROST_OBJS += \
  address.o \
  udp_socket.o \
  packet_capture.o \
  packet_writer.o
endif
ifeq ($(HAVE_RDMA),1)
  LIBBIFROST_OBJS += \
  rdma.o
endif
ifeq ($(HAVE_CUDA),1)
  # These files require the CUDA Toolkit to compile
  LIBBIFROST_OBJS += \
  transpose.o \
  fft.o \
  fft_kernels.o \
  fdmt.o \
  map.o \
  trace.o \
  linalg.o \
  linalg_kernels.o \
  romein.o \
  romein_kernels.o \
  reduce.o \
  fir.o \
  guantize.o \
  gunpack.o
endif

JIT_SOURCES ?= \
  Complex.hpp.jit \
  Vector.hpp.jit \
  IndexArray.cuh.jit \
  ArrayIndexer.cuh.jit \
  ShapeIndexer.cuh.jit \
  int_fastdiv.h.jit

MAKEFILES = ../config.mk Makefile

#NVCCFLAGS += -Xcudafe "--diag_suppress=unrecognized_gcc_pragma"
#NVCCFLAGS += --expt-relaxed-constexpr

LIB_DIR = ../lib
INC_DIR = .
CPPFLAGS += -I. -I$(INC_DIR)

LIBBIFROST_VERSION_FILE = $(LIBBIFROST_NAME).version
LIBBIFROST_SO_STEM      = $(LIB_DIR)/$(LIBBIFROST_NAME)$(SO_EXT)
LIBBIFROST_SO           = $(LIBBIFROST_SO_STEM).$(LIBBIFROST_MAJOR).$(LIBBIFROST_MINOR)
LIBBIFROST_SO_NAME      = $(LIBBIFROST_NAME)$(SO_EXT).$(LIBBIFROST_MAJOR).$(LIBBIFROST_MINOR)

all: $(LIBBIFROST_SO)
.PHONY: all

$(LIBBIFROST_VERSION_FILE): $(INC_DIR)/bifrost/*.h
	@echo "Generating $(LIBBIFROST_VERSION_FILE)"
	echo "VERS_$(LIBBIFROST_MAJOR).$(LIBBIFROST_MINOR) {" > $@
	echo "  global:" >> $@
	@CTAGS@ -x --c-kinds=p $^ | @AWK@ '{print "    " $$1 ";"}' >> $@
	echo "  local:"  >> $@
	echo "    *;"    >> $@
	echo "};"        >> $@

ifeq ($(HAVE_CUDA),1)
# TODO: Need to deal with 32/64 detection here
LIBCUFFT_STATIC = $(CUDA_LIBDIR64)/libcufft_static.a
# All PTX archs included in the lib (typically only one)
CUFFT_PTX_ARCHS := $(shell @CUOBJDUMP@ --list-ptx $(LIBCUFFT_STATIC) | grep -Po "sm_[0-9]{2}" | cut -d_ -f2 | sort | uniq)
# Latest PTX arch included in the lib
CUFFT_PTX_LATEST_ARCH := $(shell echo $(CUFFT_PTX_ARCHS) | rev | cut -d' ' -f1 | rev)
CUFFT_STATIC_GENCODE = -gencode arch=compute_$(CUFFT_PTX_LATEST_ARCH),\"code=compute_$(CUFFT_PTX_LATEST_ARCH)\"
libcufft_static_pruned.a: $(LIBCUFFT_STATIC) Makefile
	# We prune out all archs except those to be included in libbifrost *and*
	#   the latest PTX arch included in libcufft_static.
	#   E.g., We may have GPU_ARCHS="35 61" but libcufft_static might only
	#     include sm_60 and compute_60, so we need to keep compute_60 in order
	#     to support sm_61.
	@NVPRUNE@ -o $@ $(NVCC_GENCODE) $(CUFFT_STATIC_GENCODE) $<
fft_kernels.o: fft_kernels.cu fft_kernels.h Makefile
	# Note: This needs to be compiled with "-dc" to make CUFFT callbacks work
	$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(GCCFLAGS)" -dc $(OUTPUT_OPTION) $<
_cuda_device_link.o: Makefile fft_kernels.o libcufft_static_pruned.a
	@echo "Linking _cuda_device_link.o"
	# TODO: "nvcc -dlink ..." does not error or warn when a -lblah is not found
	@ls ./libcufft_static_pruned.a > /dev/null
	$(NVCC) -dlink -o $@ $(NVCCFLAGS) fft_kernels.o -L. -lcufft_static_pruned
CUDA_DEVICE_LINK_OBJ = _cuda_device_link.o
else
CUDA_DEVICE_LINK_OBJ =
endif

WLFLAGS ?= $(SONAME_FLAG),$(LIBBIFROST_NAME)$(SO_EXT).$(LIBBIFROST_MAJOR)
ifeq ($(OS),Linux)
	WLFLAGS := --version-script=$(LIBBIFROST_VERSION_FILE),$(WLFLAGS)
endif

# Note: $(LIB) must go at after OBJS
$(LIBBIFROST_SO): $(LIBBIFROST_OBJS) $(LIBBIFROST_VERSION_FILE) $(CUDA_DEVICE_LINK_OBJ)
	@echo "Linking $(LIBBIFROST_SO_NAME)"
	mkdir -p $(LIB_DIR)
	$(LINKER) $(SHARED_FLAG) -Wl,$(WLFLAGS) -o $@ $(LIBBIFROST_OBJS) $(CUDA_DEVICE_LINK_OBJ) $(LIB) $(LDFLAGS)
	ln -s -f $(LIBBIFROST_SO_NAME) $(LIBBIFROST_SO_STEM).$(LIBBIFROST_MAJOR)
	ln -s -f $(LIBBIFROST_SO_NAME) $(LIBBIFROST_SO_STEM)
	@echo "Successfully built $(LIBBIFROST_SO_NAME)"

*.o: $(MAKEFILES)

map.o: $(JIT_SOURCES)

stringify: stringify.cpp
	$(CXX) -o stringify -Wall -O3 stringify.cpp
%.jit: % stringify
	@echo "Building JIT version of $<"
	./stringify $< > $@

clean:
	rm -f *.o *.so *.dylib *.a test_bifrost
	rm -f $(LIBBIFROST_VERSION_FILE)
	rm -f $(LIBBIFROST_SO)
	rm -f $(LIBBIFROST_SO_STEM).$(LIBBIFROST_MAJOR)
	rm -f $(LIBBIFROST_SO_STEM)
	rm -f stringify
	rm -f $(DEPDIR)/*.d
	rm -f $(DEPDIR)/*.Td
	rmdir $(DEPDIR)
.PHONY: clean

include autodep.mk
